rm(list = ls())
require(stats4)
require(nlWaldTest)
require("multiwayvcov")
require(msm)
require(stargazer)
require(mfx)

data21=read.csv("C:/Users/naten/OneDrive - University of Tennessee/Dean Stuff/data/columbia_experiment_december6noon_type7clean.csv", header=TRUE)
data22=read.csv("C:/Users/naten/OneDrive - University of Tennessee/Dean Stuff/data/columbia_experiment_december15_type7clean.csv", header=TRUE)
data23=read.csv("C:/Users/naten/OneDrive - University of Tennessee/Dean Stuff/data/columbia_experiment_january26_type7(clean).csv", header=TRUE)


dataframe21=data.frame(data21)
dataframe22=data.frame(data22)
dataframe23=data.frame(data23)

#some definitions
expframe=rbind(dataframe21,dataframe22,dataframe23)
expframe=subset(expframe,uid>800)
expframe$correct <-(expframe$state==4&(expframe$response==2|expframe$response==7|expframe$response==8|expframe$response==9))|(expframe$state==3&(expframe$response==1|expframe$response==6|expframe$response==3|expframe$response==5))
expframe$incentive <- (expframe$qid==5)*5+(expframe$qid==1)*40+(expframe$qid==6)*70+(expframe$qid==7)*95
accuracy=c(sum((expframe$incentive==5)*expframe$correct)/sum(expframe$incentive==5),sum((expframe$incentive==40)*expframe$correct)/sum(expframe$incentive==40),sum((expframe$incentive==70)*expframe$correct)/sum(expframe$incentive==70),sum((expframe$incentive==95)*expframe$correct)/sum(expframe$incentive==95))
incentivec=c(5,40,70,95)
uidlist=unique(expframe$uid)

#check N
#**
length(uidlist)
#**


#for ease of factor use
expframe$incentive5=(expframe$incentive==5)
expframe$incentive40=(expframe$incentive==40)
expframe$incentive70=(expframe$incentive==70)
expframe$incentive95=(expframe$incentive==95)

#Check NIAS 
expframe$Bresponse <-(expframe$response==2|expframe$response==7|expframe$response==8|expframe$response==9)
expframe$Bstate <- expframe$state==4
expframe$Aresponse <-(expframe$response==1|expframe$response==6|expframe$response==3|expframe$response==5)
expframe$Astate <- expframe$state==3

#data cleaning
chooseB=vector('numeric')
for(i in 1:length(uidlist)){
playframe=subset(expframe,uid==uidlist[i])
chooseB[i]=(sum(playframe$Bresponse)>5)
}

zeroedlist=chooseB*uidlist
cleanlist=zeroedlist[zeroedlist>0]



#Base Regression
expframe$incentivedec=expframe$incentive/100
modelNIAS=lm(Aresponse~factor(Astate+incentivedec)+0,data=expframe)

#Adjust COV
vcovmodNIAS=vcovHC(modelNIAS, type="HC3")
vcovmodNIAScluster=cluster.vcov(modelNIAS, expframe$uid)

#for state A
A1=modelNIAS$coefficients[5]
A2=modelNIAS$coefficients[6]
A3=modelNIAS$coefficients[7]
A4=modelNIAS$coefficients[8]

#for state B
B1=1-modelNIAS$coefficients[1]
B2=1-modelNIAS$coefficients[2]
B3=1-modelNIAS$coefficients[3]
B4=1-modelNIAS$coefficients[4]

#for the lambda estimates
lambdaA=vector('numeric')
lambdahighA=vector('numeric')
lambdalowA=vector('numeric')
incentive=c(5,40,70,95)

#Lets do cluster standard errors
N=100
n=length(uidlist)
randomdraw=sample(1:n,n*N,replace=TRUE)
predictframedat=data.frame(acc5=vector('numeric'),acc40=vector('numeric'),acc70=vector('numeric'),acc95=vector('numeric'))
predictframelin=data.frame(acc5=vector('numeric'),acc40=vector('numeric'),acc70=vector('numeric'),acc95=vector('numeric'),kappa=vector("numeric"))
predictframegen=data.frame(acc5=vector('numeric'),acc40=vector('numeric'),acc70=vector('numeric'),acc95=vector('numeric'),kappa=vector("numeric"),rho=vector("numeric"))
predictframepow=data.frame(acc5=vector('numeric'),acc40=vector('numeric'),acc70=vector('numeric'),acc95=vector('numeric'),kappa=vector("numeric"),rho=vector("numeric"))
predictframeexp=data.frame(acc5=vector('numeric'),acc40=vector('numeric'),acc70=vector('numeric'),acc95=vector('numeric'),kappa=vector("numeric"),rho=vector("numeric"))

for(k in 1:N){
bootframe=data.frame()
for(l in 1:n){
addframe=subset(expframe,uid==uidlist[randomdraw[(k-1)*n+l]])
bootframe=rbind(bootframe,addframe)
}

boottotcor=c(sum(bootframe$correct*(bootframe$incentive==5)),sum(bootframe$correct*(bootframe$incentive==40)),sum(bootframe$correct*(bootframe$incentive==70)),sum(bootframe$correct*(bootframe$incentive==95)))
boottotinc=c(sum((1-bootframe$correct)*(bootframe$incentive==5)),sum((1-bootframe$correct)*(bootframe$incentive==40)),sum((1-bootframe$correct)*(bootframe$incentive==70)),sum((1-bootframe$correct)*(bootframe$incentive==95)))

acculin=function(kappa){
accuvec=vector('numeric')
for(i in 1:4){
cost=function(accu){
return(kappa*(accu*log(accu)+(1-accu)*log(1-accu)))
}
negpayoff=function(accu){
return(-1*(0.4*incentivec[i]*accu-cost(accu)))
}
optimout=optimize(negpayoff, maximum=FALSE,upper=0.99, lower=0.51)
accuvec[i]=optimout$minimum
}
return(accuvec)
}

bootlikelihoodlin=function(kappa){
accuvec=acculin(kappa)
like=sum(boottotcor*log(accuvec)+boottotinc*log(1-accuvec))
return(-like)
}

bootmodellin=mle(bootlikelihoodlin,start=list(kappa=20), method="L-BFGS-B",lower=c(0.5))

bootkappa=coef(bootmodellin)[1]
addframe=data.frame(acc5=acculin(bootkappa)[1],acc40=acculin(bootkappa)[2],acc70=acculin(bootkappa)[3],acc95=acculin(bootkappa)[4],kappa=bootkappa)
predictframelin=rbind(predictframelin,addframe)

################################
#General Entropy Model

accugen=function(kappa,rho){
accuvec=vector('numeric')
for(i in 1:4){

cost=function(accu){
if(rho==1){
totcost=kappa*(accu*log(accu)+(1-accu)*log(1-accu))
}
if(rho==2){
totcost=kappa*(-1/2*(log(accu)+log(1-accu))-2*log(2))
}
if(rho!=1&rho!=2){
totcost=kappa*(2^(1-rho)/((rho-1)*(rho-2))*(accu*accu^(1-rho)+(1-accu)*(1-accu)^(1-rho))-1/((rho-1)*(rho-2))-log(2))
}
return(totcost)
}


negpayoff=function(accu){
return(-1*(0.4*incentivec[i]*accu-cost(accu)))
}

optimout=optimize(negpayoff, maximum=FALSE,upper=0.99, lower=0.51)
accuvec[i]=optimout$minimum
}
return(accuvec)
}

bootlikelihoodgen=function(kappa,rho){
accuvec=accugen(kappa,rho)
like=sum(boottotcor*log(accuvec)+boottotinc*log(1-accuvec))
return(-like)
}

#bootmodelgen=mle(bootlikelihoodgen,start=list(kappa=0.0021,rho=14), method="L-BFGS-B",lower=c(0,0))

bootmodelgen=mle(bootlikelihoodgen,start=list(kappa=0.21,rho=14), method="L-BFGS-B",lower=c(0.001,0.001))

#bootmodelgen=mle(bootlikelihoodgen,start=list(kappa=0.021,rho=14), method="L-BFGS-B",lower=c(0.000001,5),control=list(trace=4))
#tryCatch({
#bootmodelgen=mle(bootlikelihoodgen,start=list(kappa=0.021,rho=14), method="L-BFGS-B",lower=c(0.000001,5),upper=c(50,50))
#},error=function(e){})


bootkappa=coef(bootmodelgen)[1]
bootrho=coef(bootmodelgen)[2]


addframe=data.frame(acc5=accugen(bootkappa,bootrho)[1],acc40=accugen(bootkappa,bootrho)[2],acc70=accugen(bootkappa,bootrho)[3],acc95=accugen(bootkappa,bootrho)[4],kappa=bootkappa, rho=bootrho)
predictframegen=rbind(predictframegen,addframe)

####################################
#Power mutual information

accupow=function(kappa,rho){
accuvec=vector('numeric')
for(i in 1:4){

cost=function(accu){
totcost=(accu*log(accu)+(1-accu)*log(1-accu)-2*0.5*log(0.5))
if(rho>0){
return(totcost^rho)
}else{
return(-1*totcost^rho)
}
}

negpayoff=function(accu){
return(-1*(0.4*incentivec[i]*accu-kappa*cost(accu)))
}

optimout=optimize(negpayoff, maximum=FALSE,upper=0.99, lower=0.51)
accuvec[i]=optimout$minimum
}
return(accuvec)
}

bootlikelihoodpow=function(kappa,rho){
accuvec=accupow(kappa,rho)
like=sum(boottotcor*log(accuvec)+boottotinc*log(1-accuvec))
return(-like)
}

bootmodelpow=mle(bootlikelihoodpow,start=list(kappa=7728,rho=4.23), method="L-BFGS-B",lower=c(0.001,-100),upper=c(10000,100))


#bootmodelpow=mle(bootlikelihoodpow,start=list(kappa=0.021,rho=14), method="L-BFGS-B",lower=c(0.000001,5),control=list(trace=4))
#tryCatch({
#bootmodelpow=mle(bootlikelihoodpow,start=list(kappa=0.021,rho=14), method="L-BFGS-B",lower=c(0.000001,5),upper=c(50,50))
#},error=function(e){})


bootkappa=coef(bootmodelpow)[1]
bootrho=coef(bootmodelpow)[2]

addframe=data.frame(acc5=accupow(bootkappa,bootrho)[1],acc40=accupow(bootkappa,bootrho)[2],acc70=accupow(bootkappa,bootrho)[3],acc95=accupow(bootkappa,bootrho)[4],kappa=bootkappa, rho=bootrho)
predictframepow=rbind(predictframepow,addframe)

####################################
#Exponential Mutual Information
accuexp=function(kappa,rho){
accuvec=vector('numeric')
for(i in 1:4){

cost=function(accu){
totcost=(accu*log(accu)+(1-accu)*log(1-accu))
return(exp(rho*totcost))
}

negpayoff=function(accu){
return(-1*(0.4*incentivec[i]*accu-kappa*cost(accu)))
}

optimout=optimize(negpayoff, maximum=FALSE,upper=0.99, lower=0.51)
accuvec[i]=optimout$minimum
}
return(accuvec)
}

bootlikelihoodexp=function(kappa,rho){
accuvec=accuexp(kappa,rho)
like=sum(boottotcor*log(accuvec)+boottotinc*log(1-accuvec))
return(-like)
}

bootmodelexp=mle(bootlikelihoodexp,start=list(kappa=10 ,rho=2 ), method="L-BFGS-B",lower=c(0.001,0.001),upper=c(10000,100))

bootkappa=coef(bootmodelexp)[1]
bootrho=coef(bootmodelexp)[2]

addframe=data.frame(acc5=accuexp(bootkappa,bootrho)[1],acc40=accuexp(bootkappa,bootrho)[2],acc70=accuexp(bootkappa,bootrho)[3],acc95=accuexp(bootkappa,bootrho)[4],kappa=bootkappa, rho=bootrho)
predictframeexp=rbind(predictframeexp,addframe)

bootdatvec=boottotcor/(boottotcor+boottotinc)
addframe=data.frame(acc5=bootdatvec[1],acc40=bootdatvec[2],acc70=bootdatvec[3],acc95=bootdatvec[4])
predictframedat=rbind(predictframedat,addframe)

print(k)
}

#now with the basic data
totcor=c(sum(expframe$correct*(expframe$incentive==5)),sum(expframe$correct*(expframe$incentive==40)),sum(expframe$correct*(expframe$incentive==70)),sum(expframe$correct*(expframe$incentive==95)))
totinc=c(sum((1-expframe$correct)*(expframe$incentive==5)),sum((1-expframe$correct)*(expframe$incentive==40)),sum((1-expframe$correct)*(expframe$incentive==70)),sum((1-expframe$correct)*(expframe$incentive==95)))

#find the multinomial constant
multinom=function(vec){
return(sum(log(1:sum(vec)))-sum(log(c(1:vec[1],1:vec[2]))))
}
multinomvec=vector("numeric")
for(i in 1:4){
multinomvec[i]=multinom(c(totcor[i],totinc[i]))
}
multinomconst=sum(multinomvec)
#multinomconst=0

#Linear Mutual Information
likelihoodlin=function(kappa){
accuvec=acculin(kappa)
like=multinomconst+sum(totcor*log(accuvec)+totinc*log(1-accuvec))
return(-like)
}

#data CI
fitdathigh=vector("numeric")
fitdathigh[1]=sort(predictframedat$acc5)[95]
fitdathigh[2]=sort(predictframedat$acc40)[95]
fitdathigh[3]=sort(predictframedat$acc70)[95]
fitdathigh[4]=sort(predictframedat$acc95)[95]

fitdatlow=vector("numeric")
fitdatlow[1]=sort(predictframedat$acc5)[5]
fitdatlow[2]=sort(predictframedat$acc40)[5]
fitdatlow[3]=sort(predictframedat$acc70)[5]
fitdatlow[4]=sort(predictframedat$acc95)[5]

modellin=mle(likelihoodlin,start=list(kappa=90), method="L-BFGS-B",lower=c(0.01))
datavec=totcor/(totcor+totinc)

vcovest=vcov(modellin)
kappaest=coef(modellin)[1]

fitlin=acculin(kappaest)

fitlinhigh=vector("numeric")
fitlinhigh[1]=sort(predictframelin$acc5)[95]
fitlinhigh[2]=sort(predictframelin$acc40)[95]
fitlinhigh[3]=sort(predictframelin$acc70)[95]
fitlinhigh[4]=sort(predictframelin$acc95)[95]

fitlinlow=vector("numeric")
fitlinlow[1]=sort(predictframelin$acc5)[5]
fitlinlow[2]=sort(predictframelin$acc40)[5]
fitlinlow[3]=sort(predictframelin$acc70)[5]
fitlinlow[4]=sort(predictframelin$acc95)[5]


#General entropy
likelihoodgen=function(kappa,rho){
accuvec=accugen(kappa,rho)
like=multinomconst+sum(totcor*log(accuvec)+totinc*log(1-accuvec))
return(-like)
}

modelgen=mle(likelihoodgen,start=list(kappa=1,rho=10), method="L-BFGS-B",lower=c(0.01,0.1),upper=c(100,1000))

kappaest=coef(modelgen)[1]
rhoest=coef(modelgen)[2]

fitgen=accugen(kappaest,rhoest)

fitgenhigh=vector('numeric')
fitgenlow=vector('numeric')

fitgenhigh[1]=sort(predictframegen$acc5)[95]
fitgenhigh[2]=sort(predictframegen$acc40)[95]
fitgenhigh[3]=sort(predictframegen$acc70)[95]
fitgenhigh[4]=sort(predictframegen$acc95)[95]

fitgenlow[1]=sort(predictframegen$acc5)[5]
fitgenlow[2]=sort(predictframegen$acc40)[5]
fitgenlow[3]=sort(predictframegen$acc70)[5]
fitgenlow[4]=sort(predictframegen$acc95)[5]

p1givenA=vector('numeric')
p1givenA[1]=sum(expframe$Astate*expframe$Aresponse*(expframe$incentive==5))/sum(expframe$Aresponse*(expframe$incentive==5))
p1givenA[2]=sum(expframe$Astate*expframe$Aresponse*(expframe$incentive==40))/sum(expframe$Aresponse*(expframe$incentive==40))
p1givenA[3]=sum(expframe$Astate*expframe$Aresponse*(expframe$incentive==70))/sum(expframe$Aresponse*(expframe$incentive==70))
p1givenA[4]=sum(expframe$Astate*expframe$Aresponse*(expframe$incentive==95))/sum(expframe$Aresponse*(expframe$incentive==95))

p2givenB=vector('numeric')
p2givenB[1]=sum(expframe$Bstate*expframe$Bresponse*(expframe$incentive==5))/sum(expframe$Bresponse*(expframe$incentive==5))
p2givenB[2]=sum(expframe$Bstate*expframe$Bresponse*(expframe$incentive==40))/sum(expframe$Bresponse*(expframe$incentive==40))
p2givenB[3]=sum(expframe$Bstate*expframe$Bresponse*(expframe$incentive==70))/sum(expframe$Bresponse*(expframe$incentive==70))
p2givenB[4]=sum(expframe$Bstate*expframe$Bresponse*(expframe$incentive==95))/sum(expframe$Bresponse*(expframe$incentive==95))


#**********************************
plot(incentivec, fitlin, main="Incentive v Accuracy with Predicted Expansion Path", 
  	xlab="Incentive ", ylab="Accuracy ",ylim=c(0.5, 0.9),pch=20,xlim=c(0,101))
rect(xleft=incentivec-2,xright=incentivec+2,ybottom=0,ytop=datavec,col="aliceblue")

points(incentivec, fitlin,pch=20)
lines(incentivec, fitlin, lty=1)
points(incentivec,fitgen,pch=2)
lines(incentivec,fitgen,lty=1)

lines(incentivec,fitlinlow,lty=2)
lines(incentivec,fitlinhigh,lty=2)
lines(incentivec,fitgenlow,lty=3)
lines(incentivec,fitgenhigh,lty=3)
axis(2)
axis(1)
box(bty="l")
legend(x=60,y=0.9,legend=c("Shannon","General Entropy","Accuracy"), lwd=c(1,1,10), pch=c(20,2,15),col=c("black","black","aliceblue"))

#**********************************

#Power Entropy
likelihoodpow=function(kappa,rho){
accuvec=accupow(kappa,rho)
like=multinomconst+sum(totcor*log(accuvec)+totinc*log(1-accuvec))
return(-like)
}

modelpow=mle(likelihoodpow,start=list(kappa=7728,rho=4.23), method="L-BFGS-B",lower=c(0.001,-100),upper=c(10000,100))

kappaest=coef(modelpow)[1]
rhoest=coef(modelpow)[2]

fitpow=accupow(kappaest,rhoest)


fitpowhigh=vector('numeric')
fitpowlow=vector('numeric')

fitpowhigh[1]=sort(predictframepow$acc5)[95]
fitpowhigh[2]=sort(predictframepow$acc40)[95]
fitpowhigh[3]=sort(predictframepow$acc70)[95]
fitpowhigh[4]=sort(predictframepow$acc95)[95]

fitpowlow[1]=sort(predictframepow$acc5)[5]
fitpowlow[2]=sort(predictframepow$acc40)[5]
fitpowlow[3]=sort(predictframepow$acc70)[5]
fitpowlow[4]=sort(predictframepow$acc95)[5]


#**********************************
plot(incentivec, fitlin, main="Incentive v Accuracy with Predicted Expansion Path", 
  	xlab="Incentive ", ylab="Accuracy ",ylim=c(0.5, 0.9),pch=20,xlim=c(0,101))
rect(xleft=incentivec-2,xright=incentivec+2,ybottom=0,ytop=datavec,col="aliceblue")

points(incentivec, fitlin,pch=20)
lines(incentivec, fitlin, lty=1)
points(incentivec,fitpow,pch=2)
lines(incentivec,fitpow,lty=1)

lines(incentivec,fitlinlow,lty=2)
lines(incentivec,fitlinhigh,lty=2)
lines(incentivec,fitpowlow,lty=3)
lines(incentivec,fitpowhigh,lty=3)
axis(2)
axis(1)
box(bty="l")
legend(x=60,y=0.9,legend=c("Shannon","Power Mutual Info","Accuracy"), lwd=c(1,1,10), pch=c(20,2,15),col=c("black","black","aliceblue"))


#**********************************





